/* diff - compare files line by line

   Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007,
   2009-2013, 2015 Free Software Foundation, Inc.

   This file is part of GNU DIFF.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#define GDIFF_MAIN
#include "diff.h"
#include <assert.h>
#include "paths.h"
#include <c-stack.h>
#include <dirname.h>
#include <error.h>
#include <exclude.h>
#include <exitfail.h>
#include <filenamecat.h>
#include <file-type.h>
#include <fnmatch.h>
#include <getopt.h>
#include <hard-locale.h>
#include <prepargs.h>
#include <progname.h>
#include <sh-quote.h>
#include <stat-time.h>
#include <timespec.h>
#include <version-etc.h>
#include <xalloc.h>
#include <xreadlink.h>
#include <binary-io.h>

/* The official name of this program (e.g., no 'g' prefix).  */
#define PROGRAM_NAME "diff"

#define AUTHORS \
  proper_name ("Paul Eggert"), \
  proper_name ("Mike Haertel"), \
  proper_name ("David Hayes"), \
  proper_name ("Richard Stallman"), \
  proper_name ("Len Tower")

#ifndef GUTTER_WIDTH_MINIMUM
# define GUTTER_WIDTH_MINIMUM 3
#endif

struct regexp_list
{
  char *regexps;	/* chars representing disjunction of the regexps */
  size_t len;		/* chars used in 'regexps' */
  size_t size;		/* size malloc'ed for 'regexps'; 0 if not malloc'ed */
  bool multiple_regexps;/* Does 'regexps' represent a disjunction?  */
  struct re_pattern_buffer *buf;
};

static int compare_files (struct comparison const *, char const *, char const *);
static void add_regexp (struct regexp_list *, char const *);
static void summarize_regexp_list (struct regexp_list *);
static void specify_style (enum output_style);
static void specify_value (char const **, char const *, char const *);
static void try_help (char const *, char const *) __attribute__((noreturn));
static void check_stdout (void);
static void usage (void);

/* If comparing directories, compare their common subdirectories
   recursively.  */
static bool recursive;

/* In context diffs, show previous lines that match these regexps.  */
static struct regexp_list function_regexp_list;

/* Ignore changes affecting only lines that match these regexps.  */
static struct regexp_list ignore_regexp_list;

#if O_BINARY
/* Use binary I/O when reading and writing data (--binary).
   On POSIX hosts, this has no effect.  */
static bool binary;
#else
enum { binary = true };
#endif

/* If one file is missing, treat it as present but empty (-N).  */
static bool new_file;

/* If the first file is missing, treat it as present but empty
   (--unidirectional-new-file).  */
static bool unidirectional_new_file;

/* Report files compared that are the same (-s).
   Normally nothing is output when that happens.  */
static bool report_identical_files;

static char const shortopts[] =
"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";

/* Values for long options that do not have single-letter equivalents.  */
enum
{
  BINARY_OPTION = CHAR_MAX + 1,
  FROM_FILE_OPTION,
  HELP_OPTION,
  HORIZON_LINES_OPTION,
  IGNORE_FILE_NAME_CASE_OPTION,
  INHIBIT_HUNK_MERGE_OPTION,
  LEFT_COLUMN_OPTION,
  LINE_FORMAT_OPTION,
  NO_DEREFERENCE_OPTION,
  NO_IGNORE_FILE_NAME_CASE_OPTION,
  NORMAL_OPTION,
  SDIFF_MERGE_ASSIST_OPTION,
  STRIP_TRAILING_CR_OPTION,
  SUPPRESS_BLANK_EMPTY_OPTION,
  SUPPRESS_COMMON_LINES_OPTION,
  TABSIZE_OPTION,
  TO_FILE_OPTION,

  /* These options must be in sequence.  */
  UNCHANGED_LINE_FORMAT_OPTION,
  OLD_LINE_FORMAT_OPTION,
  NEW_LINE_FORMAT_OPTION,

  /* These options must be in sequence.  */
  UNCHANGED_GROUP_FORMAT_OPTION,
  OLD_GROUP_FORMAT_OPTION,
  NEW_GROUP_FORMAT_OPTION,
  CHANGED_GROUP_FORMAT_OPTION
};

static char const group_format_option[][sizeof "--unchanged-group-format"] =
  {
    "--unchanged-group-format",
    "--old-group-format",
    "--new-group-format",
    "--changed-group-format"
  };

static char const line_format_option[][sizeof "--unchanged-line-format"] =
  {
    "--unchanged-line-format",
    "--old-line-format",
    "--new-line-format"
  };

static struct option const longopts[] =
{
  {"binary", 0, 0, BINARY_OPTION},
  {"brief", 0, 0, 'q'},
  {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
  {"context", 2, 0, 'C'},
  {"ed", 0, 0, 'e'},
  {"exclude", 1, 0, 'x'},
  {"exclude-from", 1, 0, 'X'},
  {"expand-tabs", 0, 0, 't'},
  {"forward-ed", 0, 0, 'f'},
  {"from-file", 1, 0, FROM_FILE_OPTION},
  {"help", 0, 0, HELP_OPTION},
  {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
  {"ifdef", 1, 0, 'D'},
  {"ignore-all-space", 0, 0, 'w'},
  {"ignore-blank-lines", 0, 0, 'B'},
  {"ignore-case", 0, 0, 'i'},
  {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
  {"ignore-matching-lines", 1, 0, 'I'},
  {"ignore-space-change", 0, 0, 'b'},
  {"ignore-tab-expansion", 0, 0, 'E'},
  {"ignore-trailing-space", 0, 0, 'Z'},
  {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
  {"initial-tab", 0, 0, 'T'},
  {"label", 1, 0, 'L'},
  {"left-column", 0, 0, LEFT_COLUMN_OPTION},
  {"line-format", 1, 0, LINE_FORMAT_OPTION},
  {"minimal", 0, 0, 'd'},
  {"new-file", 0, 0, 'N'},
  {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
  {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
  {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION},
  {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
  {"normal", 0, 0, NORMAL_OPTION},
  {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
  {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
  {"paginate", 0, 0, 'l'},
  {"rcs", 0, 0, 'n'},
  {"recursive", 0, 0, 'r'},
  {"report-identical-files", 0, 0, 's'},
  {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
  {"show-c-function", 0, 0, 'p'},
  {"show-function-line", 1, 0, 'F'},
  {"side-by-side", 0, 0, 'y'},
  {"speed-large-files", 0, 0, 'H'},
  {"starting-file", 1, 0, 'S'},
  {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
  {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION},
  {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
  {"tabsize", 1, 0, TABSIZE_OPTION},
  {"text", 0, 0, 'a'},
  {"to-file", 1, 0, TO_FILE_OPTION},
  {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
  {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
  {"unidirectional-new-file", 0, 0, 'P'},
  {"unified", 2, 0, 'U'},
  {"version", 0, 0, 'v'},
  {"width", 1, 0, 'W'},
  {0, 0, 0, 0}
};

/* Return a string containing the command options with which diff was invoked.
   Spaces appear between what were separate ARGV-elements.
   There is a space at the beginning but none at the end.
   If there were no options, the result is an empty string.

   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
   the length of that vector.  */

static char *
option_list (char **optionvec, int count)
{
  int i;
  size_t size = 1;
  char *result;
  char *p;

  for (i = 0; i < count; i++)
    size += 1 + shell_quote_length (optionvec[i]);

  p = result = xmalloc (size);

  for (i = 0; i < count; i++)
    {
      *p++ = ' ';
      p = shell_quote_copy (p, optionvec[i]);
    }

  *p = '\0';
  return result;
}


/* Return an option value suitable for add_exclude.  */

static int
exclude_options (void)
{
  return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
}

int
main (int argc, char **argv)
{
  int exit_status = EXIT_SUCCESS;
  int c;
  int i;
  int prev = -1;
  lin ocontext = -1;
  bool explicit_context = false;
  size_t width = 0;
  bool show_c_function = false;
  char const *from_file = NULL;
  char const *to_file = NULL;
  uintmax_t numval;
  char *numend;

  /* Do our initializations.  */
  exit_failure = EXIT_TROUBLE;
  initialize_main (&argc, &argv);
  set_program_name (argv[0]);
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);
  c_stack_action (0);
  function_regexp_list.buf = &function_regexp;
  ignore_regexp_list.buf = &ignore_regexp;
  re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
  excluded = new_exclude ();

  /* Decode the options.  */

  while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
    {
      switch (c)
	{
	case 0:
	  break;

	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	  ocontext = (! ISDIGIT (prev)
		      ? c - '0'
		      : (ocontext - (c - '0' <= CONTEXT_MAX % 10)
			 < CONTEXT_MAX / 10)
		      ? 10 * ocontext + (c - '0')
		      : CONTEXT_MAX);
	  break;

	case 'a':
	  text = true;
	  break;

	case 'b':
	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
	    ignore_white_space = IGNORE_SPACE_CHANGE;
	  break;

	case 'Z':
	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
	    ignore_white_space |= IGNORE_TRAILING_SPACE;
	  break;

	case 'B':
	  ignore_blank_lines = true;
	  break;

	case 'C':
	case 'U':
	  {
	    if (optarg)
	      {
		numval = strtoumax (optarg, &numend, 10);
		if (*numend)
		  try_help ("invalid context length '%s'", optarg);
		if (CONTEXT_MAX < numval)
		  numval = CONTEXT_MAX;
	      }
	    else
	      numval = 3;

	    specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
	    if (context < numval)
	      context = numval;
	    explicit_context = true;
	  }
	  break;

	case 'c':
	  specify_style (OUTPUT_CONTEXT);
	  if (context < 3)
	    context = 3;
	  break;

	case 'd':
	  minimal = true;
	  break;

	case 'D':
	  specify_style (OUTPUT_IFDEF);
	  {
	    static char const C_ifdef_group_formats[] =
	      "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
	    char *b = xmalloc (sizeof C_ifdef_group_formats
			       + 7 * strlen (optarg) - 14 /* 7*"%s" */
			       - 8 /* 5*"%%" + 3*"%c" */);
	    sprintf (b, C_ifdef_group_formats,
		     0,
		     optarg, optarg, 0,
		     optarg, optarg, 0,
		     optarg, optarg, optarg);
	    for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++)
	      {
		specify_value (&group_format[i], b, "-D");
		b += strlen (b) + 1;
	      }
	  }
	  break;

	case 'e':
	  specify_style (OUTPUT_ED);
	  break;

	case 'E':
	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
	    ignore_white_space |= IGNORE_TAB_EXPANSION;
	  break;

	case 'f':
	  specify_style (OUTPUT_FORWARD_ED);
	  break;

	case 'F':
	  add_regexp (&function_regexp_list, optarg);
	  break;

	case 'h':
	  /* Split the files into chunks for faster processing.
	     Usually does not change the result.

	     This currently has no effect.  */
	  break;

	case 'H':
	  speed_large_files = true;
	  break;

	case 'i':
	  ignore_case = true;
	  break;

	case 'I':
	  add_regexp (&ignore_regexp_list, optarg);
	  break;

	case 'l':
	  if (!pr_program[0])
	    try_help ("pagination not supported on this host", NULL);
	  paginate = true;
#ifdef SIGCHLD
	  /* Pagination requires forking and waiting, and
	     System V fork+wait does not work if SIGCHLD is ignored.  */
	  signal (SIGCHLD, SIG_DFL);
#endif
	  break;

	case 'L':
	  if (!file_label[0])
	    file_label[0] = optarg;
	  else if (!file_label[1])
	    file_label[1] = optarg;
	  else
	    fatal ("too many file label options");
	  break;

	case 'n':
	  specify_style (OUTPUT_RCS);
	  break;

	case 'N':
	  new_file = true;
	  break;

	case 'p':
	  show_c_function = true;
	  add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
	  break;

	case 'P':
	  unidirectional_new_file = true;
	  break;

	case 'q':
	  brief = true;
	  break;

	case 'r':
	  recursive = true;
	  break;

	case 's':
	  report_identical_files = true;
	  break;

	case 'S':
	  specify_value (&starting_file, optarg, "-S");
	  break;

	case 't':
	  expand_tabs = true;
	  break;

	case 'T':
	  initial_tab = true;
	  break;

	case 'u':
	  specify_style (OUTPUT_UNIFIED);
	  if (context < 3)
	    context = 3;
	  break;

	case 'v':
	  version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
		       AUTHORS, (char *) NULL);
	  check_stdout ();
	  return EXIT_SUCCESS;

	case 'w':
	  ignore_white_space = IGNORE_ALL_SPACE;
	  break;

	case 'x':
	  add_exclude (excluded, optarg, exclude_options ());
	  break;

	case 'X':
	  if (add_exclude_file (add_exclude, excluded, optarg,
				exclude_options (), '\n'))
	    pfatal_with_name (optarg);
	  break;

	case 'y':
	  specify_style (OUTPUT_SDIFF);
	  break;

	case 'W':
	  numval = strtoumax (optarg, &numend, 10);
	  if (! (0 < numval && numval <= SIZE_MAX) || *numend)
	    try_help ("invalid width '%s'", optarg);
	  if (width != numval)
	    {
	      if (width)
		fatal ("conflicting width options");
	      width = numval;
	    }
	  break;

	case BINARY_OPTION:
#if O_BINARY
	  binary = true;
	  if (! isatty (STDOUT_FILENO))
	    set_binary_mode (STDOUT_FILENO, O_BINARY);
#endif
	  break;

	case FROM_FILE_OPTION:
	  specify_value (&from_file, optarg, "--from-file");
	  break;

	case HELP_OPTION:
	  usage ();
	  check_stdout ();
	  return EXIT_SUCCESS;

	case HORIZON_LINES_OPTION:
	  numval = strtoumax (optarg, &numend, 10);
	  if (*numend)
	    try_help ("invalid horizon length '%s'", optarg);
	  horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
	  break;

	case IGNORE_FILE_NAME_CASE_OPTION:
	  ignore_file_name_case = true;
	  break;

	case INHIBIT_HUNK_MERGE_OPTION:
	  /* This option is obsolete, but accept it for backward
             compatibility.  */
	  break;

	case LEFT_COLUMN_OPTION:
	  left_column = true;
	  break;

	case LINE_FORMAT_OPTION:
	  specify_style (OUTPUT_IFDEF);
	  for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
	    specify_value (&line_format[i], optarg, "--line-format");
	  break;

	case NO_DEREFERENCE_OPTION:
	  no_dereference_symlinks = true;
	  break;

	case NO_IGNORE_FILE_NAME_CASE_OPTION:
	  ignore_file_name_case = false;
	  break;

	case NORMAL_OPTION:
	  specify_style (OUTPUT_NORMAL);
	  break;

	case SDIFF_MERGE_ASSIST_OPTION:
	  specify_style (OUTPUT_SDIFF);
	  sdiff_merge_assist = true;
	  break;

	case STRIP_TRAILING_CR_OPTION:
	  strip_trailing_cr = true;
	  break;

	case SUPPRESS_BLANK_EMPTY_OPTION:
	  suppress_blank_empty = true;
	  break;

	case SUPPRESS_COMMON_LINES_OPTION:
	  suppress_common_lines = true;
	  break;

	case TABSIZE_OPTION:
	  numval = strtoumax (optarg, &numend, 10);
	  if (! (0 < numval && numval <= SIZE_MAX - GUTTER_WIDTH_MINIMUM)
	      || *numend)
	    try_help ("invalid tabsize '%s'", optarg);
	  if (tabsize != numval)
	    {
	      if (tabsize)
		fatal ("conflicting tabsize options");
	      tabsize = numval;
	    }
	  break;

	case TO_FILE_OPTION:
	  specify_value (&to_file, optarg, "--to-file");
	  break;

	case UNCHANGED_LINE_FORMAT_OPTION:
	case OLD_LINE_FORMAT_OPTION:
	case NEW_LINE_FORMAT_OPTION:
	  specify_style (OUTPUT_IFDEF);
	  c -= UNCHANGED_LINE_FORMAT_OPTION;
	  specify_value (&line_format[c], optarg, line_format_option[c]);
	  break;

	case UNCHANGED_GROUP_FORMAT_OPTION:
	case OLD_GROUP_FORMAT_OPTION:
	case NEW_GROUP_FORMAT_OPTION:
	case CHANGED_GROUP_FORMAT_OPTION:
	  specify_style (OUTPUT_IFDEF);
	  c -= UNCHANGED_GROUP_FORMAT_OPTION;
	  specify_value (&group_format[c], optarg, group_format_option[c]);
	  break;

	default:
	  try_help (NULL, NULL);
	}
      prev = c;
    }

  if (output_style == OUTPUT_UNSPECIFIED)
    {
      if (show_c_function)
	{
	  specify_style (OUTPUT_CONTEXT);
	  if (ocontext < 0)
	    context = 3;
	}
      else
	specify_style (OUTPUT_NORMAL);
    }

  if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
    {
#if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \
     || defined HAVE_STRUCT_STAT_ST_SPARE1)
      time_format = "%Y-%m-%d %H:%M:%S.%N %z";
#else
      time_format = "%Y-%m-%d %H:%M:%S %z";
#endif
    }
  else
    {
      /* See POSIX 1003.1-2001 for this format.  */
      time_format = "%a %b %e %T %Y";
    }

  if (0 <= ocontext
      && (output_style == OUTPUT_CONTEXT
	  || output_style == OUTPUT_UNIFIED)
      && (context < ocontext
	  || (ocontext < context && ! explicit_context)))
    context = ocontext;

  if (! tabsize)
    tabsize = 8;
  if (! width)
    width = 130;

  {
    /* Maximize first the half line width, and then the gutter width,
       according to the following constraints:

	1.  Two half lines plus a gutter must fit in a line.
	2.  If the half line width is nonzero:
	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
	    b.  If tabs are not expanded to spaces,
		a half line plus a gutter is an integral number of tabs,
		so that tabs in the right column line up.  */

    size_t t = expand_tabs ? 1 : tabsize;
    size_t w = width;
    size_t t_plus_g = t + GUTTER_WIDTH_MINIMUM;
    size_t unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1);
    size_t off = unaligned_off - unaligned_off % t;
    sdiff_half_width = (off <= GUTTER_WIDTH_MINIMUM || w <= off
			? 0
			: MIN (off - GUTTER_WIDTH_MINIMUM, w - off));
    sdiff_column2_offset = sdiff_half_width ? off : w;
  }

  /* Make the horizon at least as large as the context, so that
     shift_boundaries has more freedom to shift the first and last hunks.  */
  if (horizon_lines < context)
    horizon_lines = context;

  summarize_regexp_list (&function_regexp_list);
  summarize_regexp_list (&ignore_regexp_list);

  if (output_style == OUTPUT_IFDEF)
    {
      for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
	if (!line_format[i])
	  line_format[i] = "%l\n";
      if (!group_format[OLD])
	group_format[OLD]
	  = group_format[CHANGED] ? group_format[CHANGED] : "%<";
      if (!group_format[NEW])
	group_format[NEW]
	  = group_format[CHANGED] ? group_format[CHANGED] : "%>";
      if (!group_format[UNCHANGED])
	group_format[UNCHANGED] = "%=";
      if (!group_format[CHANGED])
	group_format[CHANGED] = concat (group_format[OLD],
					group_format[NEW], "");
    }

  no_diff_means_no_output =
    (output_style == OUTPUT_IFDEF ?
      (!*group_format[UNCHANGED]
       || (STREQ (group_format[UNCHANGED], "%=")
	   && !*line_format[UNCHANGED]))
     : (output_style != OUTPUT_SDIFF) | suppress_common_lines);

  files_can_be_treated_as_binary =
    (brief & binary
     & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
	  | (ignore_regexp_list.regexps || ignore_white_space)));

  switch_string = option_list (argv + 1, optind - 1);

  if (from_file)
    {
      if (to_file)
	fatal ("--from-file and --to-file both specified");
      else
	for (; optind < argc; optind++)
	  {
	    int status = compare_files (NULL, from_file, argv[optind]);
	    if (exit_status < status)
	      exit_status = status;
	  }
    }
  else
    {
      if (to_file)
	for (; optind < argc; optind++)
	  {
	    int status = compare_files (NULL, argv[optind], to_file);
	    if (exit_status < status)
	      exit_status = status;
	  }
      else
	{
	  if (argc - optind != 2)
	    {
	      if (argc - optind < 2)
		try_help ("missing operand after '%s'", argv[argc - 1]);
	      else
		try_help ("extra operand '%s'", argv[optind + 2]);
	    }

	  exit_status = compare_files (NULL, argv[optind], argv[optind + 1]);
	}
    }

  /* Print any messages that were saved up for last.  */
  print_message_queue ();

  check_stdout ();
  exit (exit_status);
  return exit_status;
}

/* Append to REGLIST the regexp PATTERN.  */

static void
add_regexp (struct regexp_list *reglist, char const *pattern)
{
  size_t patlen = strlen (pattern);
  char const *m = re_compile_pattern (pattern, patlen, reglist->buf);

  if (m != 0)
    error (0, 0, "%s: %s", pattern, m);
  else
    {
      char *regexps = reglist->regexps;
      size_t len = reglist->len;
      bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
      size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
      size_t size = reglist->size;

      if (size <= newlen)
	{
	  if (!size)
	    size = 1;

	  do size *= 2;
	  while (size <= newlen);

	  reglist->size = size;
	  reglist->regexps = regexps = xrealloc (regexps, size);
	}
      if (multiple_regexps)
	{
	  regexps[len++] = '\\';
	  regexps[len++] = '|';
	}
      memcpy (regexps + len, pattern, patlen + 1);
    }
}

/* Ensure that REGLIST represents the disjunction of its regexps.
   This is done here, rather than earlier, to avoid O(N^2) behavior.  */

static void
summarize_regexp_list (struct regexp_list *reglist)
{
  if (reglist->regexps)
    {
      /* At least one regexp was specified.  Allocate a fastmap for it.  */
      reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
      if (reglist->multiple_regexps)
	{
	  /* Compile the disjunction of the regexps.
	     (If just one regexp was specified, it is already compiled.)  */
	  char const *m = re_compile_pattern (reglist->regexps, reglist->len,
					      reglist->buf);
	  if (m)
	    error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
	}
    }
}

static void
try_help (char const *reason_msgid, char const *operand)
{
  if (reason_msgid)
    error (0, 0, _(reason_msgid), operand);
  error (EXIT_TROUBLE, 0, _("Try '%s --help' for more information."),
	 program_name);
  abort ();
}

static void
check_stdout (void)
{
  if (ferror (stdout))
    fatal ("write failed");
  else if (fclose (stdout) != 0)
    pfatal_with_name (_("standard output"));
}

static char const * const option_help_msgid[] = {
  N_("    --normal                  output a normal diff (the default)"),
  N_("-q, --brief                   report only when files differ"),
  N_("-s, --report-identical-files  report when two files are the same"),
  N_("-c, -C NUM, --context[=NUM]   output NUM (default 3) lines of copied context"),
  N_("-u, -U NUM, --unified[=NUM]   output NUM (default 3) lines of unified context"),
  N_("-e, --ed                      output an ed script"),
  N_("-n, --rcs                     output an RCS format diff"),
  N_("-y, --side-by-side            output in two columns"),
  N_("-W, --width=NUM               output at most NUM (default 130) print columns"),
  N_("    --left-column             output only the left column of common lines"),
  N_("    --suppress-common-lines   do not output common lines"),
  "",
  N_("-p, --show-c-function         show which C function each change is in"),
  N_("-F, --show-function-line=RE   show the most recent line matching RE"),
  N_("    --label LABEL             use LABEL instead of file name\n"
     "                                (can be repeated)"),
  "",
  N_("-t, --expand-tabs             expand tabs to spaces in output"),
  N_("-T, --initial-tab             make tabs line up by prepending a tab"),
  N_("    --tabsize=NUM             tab stops every NUM (default 8) print columns"),
  N_("    --suppress-blank-empty    suppress space or tab before empty output lines"),
  N_("-l, --paginate                pass output through 'pr' to paginate it"),
  "",
  N_("-r, --recursive                 recursively compare any subdirectories found"),
  N_("    --no-dereference            don't follow symbolic links"),
  N_("-N, --new-file                  treat absent files as empty"),
  N_("    --unidirectional-new-file   treat absent first files as empty"),
  N_("    --ignore-file-name-case     ignore case when comparing file names"),
  N_("    --no-ignore-file-name-case  consider case when comparing file names"),
  N_("-x, --exclude=PAT               exclude files that match PAT"),
  N_("-X, --exclude-from=FILE         exclude files that match any pattern in FILE"),
  N_("-S, --starting-file=FILE        start with FILE when comparing directories"),
  N_("    --from-file=FILE1           compare FILE1 to all operands;\n"
     "                                  FILE1 can be a directory"),
  N_("    --to-file=FILE2             compare all operands to FILE2;\n"
     "                                  FILE2 can be a directory"),
  "",
  N_("-i, --ignore-case               ignore case differences in file contents"),
  N_("-E, --ignore-tab-expansion      ignore changes due to tab expansion"),
  N_("-Z, --ignore-trailing-space     ignore white space at line end"),
  N_("-b, --ignore-space-change       ignore changes in the amount of white space"),
  N_("-w, --ignore-all-space          ignore all white space"),
  N_("-B, --ignore-blank-lines        ignore changes where lines are all blank"),
  N_("-I, --ignore-matching-lines=RE  ignore changes where all lines match RE"),
  "",
  N_("-a, --text                      treat all files as text"),
  N_("    --strip-trailing-cr         strip trailing carriage return on input"),
#if O_BINARY
  N_("    --binary                    read and write data in binary mode"),
#endif
  "",
  N_("-D, --ifdef=NAME                output merged file with '#ifdef NAME' diffs"),
  N_("    --GTYPE-group-format=GFMT   format GTYPE input groups with GFMT"),
  N_("    --line-format=LFMT          format all input lines with LFMT"),
  N_("    --LTYPE-line-format=LFMT    format LTYPE input lines with LFMT"),
  N_("  These format options provide fine-grained control over the output\n"
     "    of diff, generalizing -D/--ifdef."),
  N_("  LTYPE is 'old', 'new', or 'unchanged'.  GTYPE is LTYPE or 'changed'."),
  N_("  GFMT (only) may contain:\n\
    %<  lines from FILE1\n\
    %>  lines from FILE2\n\
    %=  lines common to FILE1 and FILE2\n\
    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER\n\
      LETTERs are as follows for new group, lower case for old group:\n\
        F  first line number\n\
        L  last line number\n\
        N  number of lines = L-F+1\n\
        E  F-1\n\
        M  L+1\n\
    %(A=B?T:E)  if A equals B then T else E"),
  N_("  LFMT (only) may contain:\n\
    %L  contents of line\n\
    %l  contents of line, excluding any trailing newline\n\
    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number"),
  N_("  Both GFMT and LFMT may contain:\n\
    %%  %\n\
    %c'C'  the single character C\n\
    %c'\\OOO'  the character with octal code OOO\n\
    C    the character C (other characters represent themselves)"),
  "",
  N_("-d, --minimal            try hard to find a smaller set of changes"),
  N_("    --horizon-lines=NUM  keep NUM lines of the common prefix and suffix"),
  N_("    --speed-large-files  assume large files and many scattered small changes"),
  "",
  N_("    --help               display this help and exit"),
  N_("-v, --version            output version information and exit"),
  "",
  N_("FILES are 'FILE1 FILE2' or 'DIR1 DIR2' or 'DIR FILE' or 'FILE DIR'."),
  N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."),
  N_("If a FILE is '-', read standard input."),
  N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
  0
};

static void
usage (void)
{
  char const * const *p;

  printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
  printf ("%s\n\n", _("Compare FILES line by line."));

  fputs (_("\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);

  for (p = option_help_msgid;  *p;  p++)
    {
      if (!**p)
	putchar ('\n');
      else
	{
	  char const *msg = _(*p);
	  char const *nl;
	  while ((nl = strchr (msg, '\n')))
	    {
	      int msglen = nl + 1 - msg;
	      printf ("  %.*s", msglen, msg);
	      msg = nl + 1;
	    }

	  printf ("  %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
	}
    }
  emit_bug_reporting_address ();
}

/* Set VAR to VALUE, reporting an OPTION error if this is a
   conflict.  */
static void
specify_value (char const **var, char const *value, char const *option)
{
  if (*var && ! STREQ (*var, value))
    {
      error (0, 0, _("conflicting %s option value '%s'"), option, value);
      try_help (NULL, NULL);
    }
  *var = value;
}

/* Set the output style to STYLE, diagnosing conflicts.  */
static void
specify_style (enum output_style style)
{
  if (output_style != style)
    {
      if (output_style != OUTPUT_UNSPECIFIED)
	try_help ("conflicting output style options", NULL);
      output_style = style;
    }
}

/* Set the last-modified time of *ST to be the current time.  */

static void
set_mtime_to_now (struct stat *st)
{
#ifdef STAT_TIMESPEC
  gettime (&STAT_TIMESPEC (st, st_mtim));
#else
  struct timespec t;
  gettime (&t);
  st->st_mtime = t.tv_sec;
# if defined STAT_TIMESPEC_NS
  STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec;
# elif defined HAVE_STRUCT_STAT_ST_SPARE1
  st->st_spare1 = t.tv_nsec / 1000;
# endif
#endif
}

/* Compare two files (or dirs) with parent comparison PARENT
   and names NAME0 and NAME1.
   (If PARENT is null, then the first name is just NAME0, etc.)
   This is self-contained; it opens the files and closes them.

   Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
   different, EXIT_TROUBLE if there is a problem opening them.  */

static int
compare_files (struct comparison const *parent,
	       char const *name0,
	       char const *name1)
{
  struct comparison cmp;
#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
  register int f;
  int status = EXIT_SUCCESS;
  bool same_files;
  char *free0;
  char *free1;

  /* If this is directory comparison, perhaps we have a file
     that exists only in one of the directories.
     If so, just print a message to that effect.  */

  if (! ((name0 && name1)
	 || (unidirectional_new_file && name1)
	 || new_file))
    {
      char const *name = name0 ? name0 : name1;
      char const *dir = parent->file[!name0].name;

      /* See POSIX 1003.1-2001 for this format.  */
      message ("Only in %s: %s\n", dir, name);

      /* Return EXIT_FAILURE so that diff_dirs will return
	 EXIT_FAILURE ("some files differ").  */
      return EXIT_FAILURE;
    }

  memset (cmp.file, 0, sizeof cmp.file);
  cmp.parent = parent;

  /* cmp.file[f].desc markers */
#define NONEXISTENT (-1) /* nonexistent file */
#define UNOPENED (-2) /* unopened file (e.g. directory) */
#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */

#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */

  cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT;
  cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT;

  /* Now record the full name of each file, including nonexistent ones.  */

  if (!name0)
    name0 = name1;
  if (!name1)
    name1 = name0;

  if (!parent)
    {
      free0 = NULL;
      free1 = NULL;
      cmp.file[0].name = name0;
      cmp.file[1].name = name1;
    }
  else
    {
      cmp.file[0].name = free0
	= file_name_concat (parent->file[0].name, name0, NULL);
      cmp.file[1].name = free1
	= file_name_concat (parent->file[1].name, name1, NULL);
    }

  /* Stat the files.  */

  for (f = 0; f < 2; f++)
    {
      if (cmp.file[f].desc != NONEXISTENT)
	{
	  if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
	    {
	      cmp.file[f].desc = cmp.file[0].desc;
	      cmp.file[f].stat = cmp.file[0].stat;
	    }
	  else if (STREQ (cmp.file[f].name, "-"))
	    {
	      cmp.file[f].desc = STDIN_FILENO;
	      if (binary && ! isatty (STDIN_FILENO))
		set_binary_mode (STDIN_FILENO, O_BINARY);
	      if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
		cmp.file[f].desc = ERRNO_ENCODE (errno);
	      else
		{
		  if (S_ISREG (cmp.file[f].stat.st_mode))
		    {
		      off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR);
		      if (pos < 0)
			cmp.file[f].desc = ERRNO_ENCODE (errno);
		      else
			cmp.file[f].stat.st_size =
			  MAX (0, cmp.file[f].stat.st_size - pos);
		    }

		  /* POSIX 1003.1-2001 requires current time for
		     stdin.  */
		  set_mtime_to_now (&cmp.file[f].stat);
		}
	    }
	  else if ((no_dereference_symlinks
		    ? lstat (cmp.file[f].name, &cmp.file[f].stat)
		    : stat (cmp.file[f].name, &cmp.file[f].stat))
		   != 0)
	    cmp.file[f].desc = ERRNO_ENCODE (errno);
	}
    }

  /* Mark files as nonexistent as needed for -N and -P, if they are
     inaccessible empty regular files (the kind of files that 'patch'
     creates to indicate nonexistent backups), or if they are
     top-level files that do not exist but their counterparts do
     exist.  */
  for (f = 0; f < 2; f++)
    if ((new_file || (f == 0 && unidirectional_new_file))
	&& (cmp.file[f].desc == UNOPENED
	    ? (S_ISREG (cmp.file[f].stat.st_mode)
	       && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
	       && cmp.file[f].stat.st_size == 0)
	    : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
		|| cmp.file[f].desc == ERRNO_ENCODE (EBADF))
	       && ! parent
	       && (cmp.file[1 - f].desc == UNOPENED
		   || cmp.file[1 - f].desc == STDIN_FILENO))))
      cmp.file[f].desc = NONEXISTENT;

  for (f = 0; f < 2; f++)
    if (cmp.file[f].desc == NONEXISTENT)
      {
	memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
	cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
      }

  for (f = 0; f < 2; f++)
    {
      int e = ERRNO_DECODE (cmp.file[f].desc);
      if (0 <= e)
	{
	  errno = e;
	  perror_with_name (cmp.file[f].name);
	  status = EXIT_TROUBLE;
	}
    }

  if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
    {
      /* If one is a directory, and it was specified in the command line,
	 use the file in that dir with the other file's basename.  */

      int fnm_arg = DIR_P (0);
      int dir_arg = 1 - fnm_arg;
      char const *fnm = cmp.file[fnm_arg].name;
      char const *dir = cmp.file[dir_arg].name;
      char const *filename = cmp.file[dir_arg].name = free0
	= find_dir_file_pathname (dir, last_component (fnm));

      if (STREQ (fnm, "-"))
	fatal ("cannot compare '-' to a directory");

      if ((no_dereference_symlinks
	   ? lstat (filename, &cmp.file[dir_arg].stat)
	   : stat (filename, &cmp.file[dir_arg].stat))
	  != 0)
	{
	  perror_with_name (filename);
	  status = EXIT_TROUBLE;
	}
    }

  if (status != EXIT_SUCCESS)
    {
      /* One of the files should exist but does not.  */
    }
  else if (cmp.file[0].desc == NONEXISTENT
	   && cmp.file[1].desc == NONEXISTENT)
    {
      /* Neither file "exists", so there's nothing to compare.  */
    }
  else if ((same_files
	    = (cmp.file[0].desc != NONEXISTENT
	       && cmp.file[1].desc != NONEXISTENT
	       && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
	       && same_file_attributes (&cmp.file[0].stat,
					&cmp.file[1].stat)))
	   && no_diff_means_no_output)
    {
      /* The two named files are actually the same physical file.
	 We know they are identical without actually reading them.  */
    }
  else if (DIR_P (0) & DIR_P (1))
    {
      if (output_style == OUTPUT_IFDEF)
	fatal ("-D option not supported with directories");

      /* If both are directories, compare the files in them.  */

      if (parent && !recursive)
	{
	  /* But don't compare dir contents one level down
	     unless -r was specified.
	     See POSIX 1003.1-2001 for this format.  */
	  message ("Common subdirectories: %s and %s\n",
		   cmp.file[0].name, cmp.file[1].name);
	}
      else
	status = diff_dirs (&cmp, compare_files);
    }
  else if ((DIR_P (0) | DIR_P (1))
	   || (parent
	       && !((S_ISREG (cmp.file[0].stat.st_mode)
		     || S_ISLNK (cmp.file[0].stat.st_mode))
		    && (S_ISREG (cmp.file[1].stat.st_mode)
			|| S_ISLNK  (cmp.file[1].stat.st_mode)))))
    {
      if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
	{
	  /* We have a subdirectory that exists only in one directory.  */

	  if ((DIR_P (0) | DIR_P (1))
	      && recursive
	      && (new_file
		  || (unidirectional_new_file
		      && cmp.file[0].desc == NONEXISTENT)))
	    status = diff_dirs (&cmp, compare_files);
	  else
	    {
	      char const *dir;

	      /* PARENT must be non-NULL here.  */
	      assert (parent);
	      dir = parent->file[cmp.file[0].desc == NONEXISTENT].name;

	      /* See POSIX 1003.1-2001 for this format.  */
	      message ("Only in %s: %s\n", dir, name0);

	      status = EXIT_FAILURE;
	    }
	}
      else
	{
	  /* We have two files that are not to be compared.  */

	  /* See POSIX 1003.1-2001 for this format.  */
	  message5 ("File %s is a %s while file %s is a %s\n",
		    file_label[0] ? file_label[0] : cmp.file[0].name,
		    file_type (&cmp.file[0].stat),
		    file_label[1] ? file_label[1] : cmp.file[1].name,
		    file_type (&cmp.file[1].stat));

	  /* This is a difference.  */
	  status = EXIT_FAILURE;
	}
    }
  else if (S_ISLNK (cmp.file[0].stat.st_mode)
	   || S_ISLNK (cmp.file[1].stat.st_mode))
    {
      /* We get here only if we use lstat(), not stat().  */
      assert (no_dereference_symlinks);

      if (S_ISLNK (cmp.file[0].stat.st_mode)
	  && S_ISLNK (cmp.file[1].stat.st_mode))
	{
	  /* Compare the values of the symbolic links.  */
	  char *link_value[2] = { NULL, NULL };

	  for (f = 0; f < 2; f++)
	    {
	      link_value[f] = xreadlink (cmp.file[f].name);
	      if (link_value[f] == NULL)
		{
		  perror_with_name (cmp.file[f].name);
		  status = EXIT_TROUBLE;
		  break;
		}
	    }
	  if (status == EXIT_SUCCESS)
	    {
	      if ( ! STREQ (link_value[0], link_value[1]))
		{
		  message ("Symbolic links %s and %s differ\n",
			   cmp.file[0].name, cmp.file[1].name);
		  /* This is a difference.  */
		  status = EXIT_FAILURE;
		}
	    }
	  for (f = 0; f < 2; f++)
	    free (link_value[f]);
	}
      else
	{
	  /* We have two files that are not to be compared, because
	     one of them is a symbolic link and the other one is not.  */

	  message5 ("File %s is a %s while file %s is a %s\n",
		    file_label[0] ? file_label[0] : cmp.file[0].name,
		    file_type (&cmp.file[0].stat),
		    file_label[1] ? file_label[1] : cmp.file[1].name,
		    file_type (&cmp.file[1].stat));

	  /* This is a difference.  */
	  status = EXIT_FAILURE;
	}
    }
  else if (files_can_be_treated_as_binary
	   && S_ISREG (cmp.file[0].stat.st_mode)
	   && S_ISREG (cmp.file[1].stat.st_mode)
	   && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size)
    {
      message ("Files %s and %s differ\n",
	       file_label[0] ? file_label[0] : cmp.file[0].name,
	       file_label[1] ? file_label[1] : cmp.file[1].name);
      status = EXIT_FAILURE;
    }
  else
    {
      /* Both exist and neither is a directory.  */

      /* Open the files and record their descriptors.  */

      int oflags = O_RDONLY | (binary ? O_BINARY : 0);

      if (cmp.file[0].desc == UNOPENED)
	if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0)
	  {
	    perror_with_name (cmp.file[0].name);
	    status = EXIT_TROUBLE;
	  }
      if (cmp.file[1].desc == UNOPENED)
	{
	  if (same_files)
	    cmp.file[1].desc = cmp.file[0].desc;
	  else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0)
	    {
	      perror_with_name (cmp.file[1].name);
	      status = EXIT_TROUBLE;
	    }
	}

      /* Compare the files, if no error was found.  */

      if (status == EXIT_SUCCESS)
	status = diff_2_files (&cmp);

      /* Close the file descriptors.  */

      if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
	{
	  perror_with_name (cmp.file[0].name);
	  status = EXIT_TROUBLE;
	}
      if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
	  && close (cmp.file[1].desc) != 0)
	{
	  perror_with_name (cmp.file[1].name);
	  status = EXIT_TROUBLE;
	}
    }

  /* Now the comparison has been done, if no error prevented it,
     and STATUS is the value this function will return.  */

  if (status == EXIT_SUCCESS)
    {
      if (report_identical_files && !DIR_P (0))
	message ("Files %s and %s are identical\n",
		 file_label[0] ? file_label[0] : cmp.file[0].name,
		 file_label[1] ? file_label[1] : cmp.file[1].name);
    }
  else
    {
      /* Flush stdout so that the user sees differences immediately.
	 This can hurt performance, unfortunately.  */
      if (fflush (stdout) != 0)
	pfatal_with_name (_("standard output"));
    }

  free (free0);
  free (free1);

  return status;
}
